##
## 0 1 2 4
## 4664 3355 46 2
Among veteran population, 57.8157927% (N=4664) does not have a valid onet 55 job (missing date);
41.5891905% (N=3355)have one valid onet 55 job;
0.5702244% (N=46) have 2 valid onet 55 job;
0.0247924% (N=2) have 4 valid onet 55 job.
Problem: All 55-ONET jobs are missing ONET job zone The most frequent 55-ONET job 55-1019.00 is "Military Officer Special and Tactical Operations Leaders" (All military officer special and tactical operations leaders not listed separately).
The second frequent 55-ONET job is 55-3019.00 "Military Enlisted Tactical Operations and Air/Weapons Specialists and Crew Members" (All military enlisted tactical operations and air/weapons specialists and crew members not listed separately)
ONET claims that they military occupation does not collect data on military occupations.
## # A tibble: 13 x 3
## # Groups: onet [13]
## onet onet_job_zone N
## <chr> <dbl> <int>
## 1 55-1019.00 NA 2224
## 2 55-3019.00 NA 663
## 3 55-3014.00 NA 126
## 4 55-3017.00 NA 117
## 5 55-3016.00 NA 101
## 6 55-1016.00 NA 97
## 7 55-1015.00 NA 61
## 8 55-3018.00 NA 32
## 9 55-3013.00 NA 17
## 10 55-3015.00 NA 7
## 11 55-3012.00 NA 6
## 12 55-1011.00 NA 3
## 13 55-3011.00 NA 1
We have 3412 veterans in our sample who have valid onet55 job (with start and end date). The average year they end their last military job (ONET55 job) is 2000, with standard deviation of 10.
bg_vet_job_seq <- bg_vet_job %>%
select(id, end_year, onet_job_zone, startdate, enddate, job_duration_day, date_end_onet55, year_enter_job_market)%>%
filter(startdate > date_end_onet55) %>% #find jobs that came after the date ended onet55 job
mutate(start_year = year(startdate))
ggplot(bg_vet_job_seq)+
geom_segment(aes(x=as.character(start_year),
xend=as.character(end_year),
y=id,
yend=id), size=0.05,
color="purple")+
theme_classic()+
labs(x = "year", title = "Time in Work Force-Veteran")bg_vet_job_seq <- bg_vet_job_seq %>%
mutate(start_year = start_year - year_enter_job_market + 1)%>%
mutate(end_year = end_year - year_enter_job_market + 1)%>%
select(id, start_year, end_year, onet_job_zone)%>%
group_by(id)%>%
arrange(desc(onet_job_zone))%>%
group_by(id)%>%
distinct(id, start_year, end_year, .keep_all = TRUE)
table(bg_vet_job_seq$onet_job_zone)##
## 1 2 3 4 5
## 33 1093 1587 5035 1584
length(unique(bg_vet_job_seq$id)) #confirm sample size match## [1] 2732
bg_vet_job_seq <- as.matrix(bg_vet_job_seq)
bg_vet_job_seq <- as.data.frame(bg_vet_job_seq)
min(bg_vet_job_seq$start_year)## [1] 1
max(bg_vet_job_seq$start_year)## [1] 49
sts_vet <- seqformat(bg_vet_job_seq, from = "SPELL", to = "STS",
id = "id", begin = "start_year", end = "end_year",
status = "onet_job_zone", process = FALSE)
# Here we are renaming columns to be in format "yn" (year in the job market)
names(sts_vet) <- paste0("y", 1:ncol(sts_vet))
nrow(sts_vet) #total number of samples## [1] 2732
#delete missing values at front and end
vet.seq <- seqdef(sts_vet, left="DEL", gaps="NA", right="DEL")
class(vet.seq)## [1] "stslist" "data.frame"
#example of three sequences
vet.seq[1, ]## Sequence
## 16149347 5-5-5-5-5-5-5-5-5-4-4-4-4-4-4
vet.seq[2, ]## Sequence
## 16109564 5-5-5-5-5-5-5-5-4-4-4-4
vet.seq[100, ]## Sequence
## 2263327 2-2
#substitution-cost matrix
#method="CONSTANT"/"TRATE"
cost_matrix_trate <- seqsubm(vet.seq, method = "TRATE", with.missing = TRUE)
cost_matrix_trate <- round(cost_matrix_trate, 2)
cost_matrix_trate## 1-> 2-> 3-> 4-> 5-> NA->
## 1-> 0.00 1.94 1.98 1.96 1.98 1.95
## 2-> 1.94 0.00 1.95 1.93 1.98 1.93
## 3-> 1.98 1.95 0.00 1.92 1.97 1.90
## 4-> 1.96 1.93 1.92 0.00 1.91 1.80
## 5-> 1.98 1.98 1.97 1.91 0.00 1.90
## NA-> 1.95 1.93 1.90 1.80 1.90 0.00
cost_matrix_constant <- seqsubm(vet.seq, method = "CONSTANT", with.missing = TRUE)# vet.seq.OM <- seqdist(vet.seq, method = "OM", indel = 3, sm = cost_matrix_constant, with.missing = TRUE)
# clusterward <- agnes(vet.seq.OM, diss = TRUE, method = "ward")
# saveRDS(clusterward, file = "data/clusterward_onet55_complete_constant_matrix.rds")
clusterward <- readRDS(file = "~/git/dspg20career/data/clusterward_onet55_complete_constant_cost_matrix.rds")
#dendrogram
plot(clusterward, which.plots =2)cluster3 <- cutree(clusterward, k=3)
cluster3 <- factor(cluster3, labels = c("Type 1", "Type 2", "Type 3"))
table(cluster3)## cluster3
## Type 1 Type 2 Type 3
## 1248 840 644
#longitudinal plot
seqfplot(vet.seq, group = cluster3, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster3)cluster5 <- cutree(clusterward, k=5)
cluster5 <- factor(cluster5, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5"))
table(cluster5)## cluster5
## Type 1 Type 2 Type 3 Type 4 Type 5
## 1248 351 335 644 154
#longitudinal plot
seqfplot(vet.seq, group = cluster5, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster5)cluster6 <- cutree(clusterward, k=6)
cluster6 <- factor(cluster6, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6"))
table(cluster6)## cluster6
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6
## 1248 152 335 644 199 154
#longitudinal plot
seqfplot(vet.seq, group = cluster6, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster6)# vet.seq.OM <- seqdist(vet.seq, method = "OM", indel = 3, sm = cost_matrix_trate, with.missing = TRUE)
# clusterward <- agnes(vet.seq.OM, diss = TRUE, method = "ward")
# saveRDS(clusterward, file = "data/clusterward_onet55_complete_trate_matrix.rds")
clusterward <- readRDS(file = "~/git/dspg20career/data/clusterward_onet55_complete_trate_cost_matrix.rds")
#dendrogram
plot(clusterward, which.plots =2)cluster5 <- cutree(clusterward, k=5)
cluster5 <- factor(cluster5, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5"))
table(cluster5)## cluster5
## Type 1 Type 2 Type 3 Type 4 Type 5
## 1032 119 187 477 917
#longitudinal plot
seqfplot(vet.seq, group = cluster5, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster5)bg_vet_job_seq <- bg_vet_job %>%
select(id, end_year, onet_job_zone, startdate, enddate, job_duration_day, date_end_onet55, year_enter_job_market)%>%
filter(startdate > date_end_onet55) %>% #find jobs that came after the date ended onet55 job
mutate(start_year = year(startdate))%>%
mutate(start_year = start_year - year_enter_job_market + 1)%>%
mutate(end_year = end_year - year_enter_job_market + 1)
hist(bg_vet_job_seq$start_year)#obtain jobs that appear after 10 years they exit military
bg_vet_job_seq <- bg_vet_job_seq%>%
filter(start_year > 10)%>%
select(id, start_year, end_year, onet_job_zone)%>%
group_by(id)%>%
arrange(desc(onet_job_zone))%>%
group_by(id)%>%
distinct(id, start_year, end_year, .keep_all = TRUE)
table(bg_vet_job_seq$onet_job_zone)##
## 1 2 3 4 5
## 21 609 914 3612 1115
length(unique(bg_vet_job_seq$id)) #confirm sample size match## [1] 2024
bg_vet_job_seq <- as.matrix(bg_vet_job_seq)
bg_vet_job_seq <- as.data.frame(bg_vet_job_seq)
min(bg_vet_job_seq$start_year)## [1] 11
max(bg_vet_job_seq$start_year)## [1] 49
sts_vet <- seqformat(bg_vet_job_seq, from = "SPELL", to = "STS",
id = "id", begin = "start_year", end = "end_year",
status = "onet_job_zone", process = FALSE)
# Here we are renaming columns to be in format "yn" (year in the job market)
names(sts_vet) <- paste0("y", 1:ncol(sts_vet))
nrow(sts_vet) #total number of samples## [1] 2024
#delete missing values at front and end
vet.seq <- seqdef(sts_vet, left="DEL", gaps="NA", right="DEL")
class(vet.seq)## [1] "stslist" "data.frame"
#example of three sequences
vet.seq[1, ]## Sequence
## 16149347 5-5-5-5-5-5-5-5-5-4-4-4-4-4-4
vet.seq[2, ]## Sequence
## 16109564 5-5-5-5-5-5-5-5-4-4-4-4
vet.seq[100, ]## Sequence
## 17742816 4-4-NA-NA-NA-NA-5-5-5-5-5-5
#substitution-cost matrix
#method="CONSTANT"/"TRATE"
cost_matrix_trate <- seqsubm(vet.seq, method = "TRATE", with.missing = TRUE)
cost_matrix_trate <- round(cost_matrix_trate, 2)
cost_matrix_trate## 1-> 2-> 3-> 4-> 5-> NA->
## 1-> 0.00 1.94 1.98 1.92 1.96 1.92
## 2-> 1.94 0.00 1.95 1.94 1.98 1.94
## 3-> 1.98 1.95 0.00 1.92 1.97 1.91
## 4-> 1.92 1.94 1.92 0.00 1.91 1.78
## 5-> 1.96 1.98 1.97 1.91 0.00 1.90
## NA-> 1.92 1.94 1.91 1.78 1.90 0.00
cost_matrix_constant <- seqsubm(vet.seq, method = "CONSTANT", with.missing = TRUE)# vet.seq.OM <- seqdist(vet.seq, method = "OM", indel = 3, sm = cost_matrix_constant, with.missing = TRUE)
# clusterward <- agnes(vet.seq.OM, diss = TRUE, method = "ward")
# saveRDS(clusterward, file = "data/clusterward_onet55_after10yrs_constant_matrix.rds")
clusterward <- readRDS(file = "~/git/dspg20career/data/clusterward_onet55_after10yrs_constant_matrix.rds")
#dendrogram
plot(clusterward, which.plots =2)cluster3 <- cutree(clusterward, k=3)
cluster3 <- factor(cluster3, labels = c("Type 1", "Type 2", "Type 3"))
table(cluster3)## cluster3
## Type 1 Type 2 Type 3
## 870 910 244
#longitudinal plot
seqfplot(vet.seq, group = cluster3, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster3)cluster4 <- cutree(clusterward, k=4)
cluster4 <- factor(cluster4, labels = c("Type 1", "Type 2", "Type 3", "Type 4"))
table(cluster4)## cluster4
## Type 1 Type 2 Type 3 Type 4
## 194 910 676 244
#longitudinal plot
seqfplot(vet.seq, group = cluster4, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster4)cluster5 <- cutree(clusterward, k=5)
cluster5 <- factor(cluster5, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5"))
table(cluster5)## cluster5
## Type 1 Type 2 Type 3 Type 4 Type 5
## 194 910 457 244 219
#longitudinal plot
seqfplot(vet.seq, group = cluster5, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster5)cluster6 <- cutree(clusterward, k=6)
cluster6 <- factor(cluster6, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6"))
table(cluster6)## cluster6
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6
## 194 910 457 244 169 50
#longitudinal plot
seqfplot(vet.seq, group = cluster6, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster6)cluster7 <- cutree(clusterward, k=7)
cluster7 <- factor(cluster7, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6", "Type 7"))
table(cluster7)## cluster7
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6 Type 7
## 194 910 457 193 169 51 50
#longitudinal plot
seqfplot(vet.seq, group = cluster7, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster7)bg_vet_job_seq <- bg_vet_job %>%
select(id, end_year, onet_job_zone, startdate, enddate, job_duration_day, date_end_onet55, year_enter_job_market)%>%
filter(startdate > date_end_onet55) %>% #find jobs that came after the date ended onet55 job
mutate(start_year = year(startdate))%>%
mutate(start_year = start_year - year_enter_job_market + 1)%>%
mutate(end_year = end_year - year_enter_job_market + 1)
bg_vet_job_seq <- bg_vet_job_seq%>%
select(id, start_year, end_year, onet_job_zone)%>%
group_by(id)%>%
arrange(desc(onet_job_zone))%>%
group_by(id)%>%
distinct(id, start_year, end_year, .keep_all = TRUE)
table(bg_vet_job_seq$onet_job_zone)##
## 1 2 3 4 5
## 33 1093 1587 5035 1584
length(unique(bg_vet_job_seq$id)) #confirm sample size match## [1] 2732
bg_vet_job_seq <- as.matrix(bg_vet_job_seq)
bg_vet_job_seq <- as.data.frame(bg_vet_job_seq)
min(bg_vet_job_seq$start_year)## [1] 1
max(bg_vet_job_seq$start_year)## [1] 49
sts_vet <- seqformat(bg_vet_job_seq, from = "SPELL", to = "STS",
id = "id", begin = "start_year", end = "end_year",
status = "onet_job_zone", process = FALSE)
class(sts_vet)## [1] "data.frame"
#obtain jobs that appear after 10 years they exit military
sts_vet <- sts_vet[, 1:10]
# Here we are renaming columns to be in format "yn" (year in the job market)
names(sts_vet) <- paste0("y", 1:ncol(sts_vet))
nrow(sts_vet) #total number of samples## [1] 2732
#delete missing values at front and end
vet.seq <- seqdef(sts_vet, left="DEL", gaps="NA", right="DEL")
class(vet.seq)## [1] "stslist" "data.frame"
#example of three sequences
vet.seq[1, ]## Sequence
## 16149347
vet.seq[2, ]## Sequence
## 16109564
vet.seq[100, ]## Sequence
## 2263327 2-2
#substitution-cost matrix
#method="CONSTANT"/"TRATE"
cost_matrix_trate <- seqsubm(vet.seq, method = "TRATE", with.missing = TRUE)
cost_matrix_trate <- round(cost_matrix_trate, 2)
cost_matrix_trate## 1-> 2-> 3-> 4-> 5-> NA->
## 1-> 0.00 1.87 2.00 2.00 2.00 1.93
## 2-> 1.87 0.00 1.93 1.93 1.96 1.90
## 3-> 2.00 1.93 0.00 1.90 1.95 1.83
## 4-> 2.00 1.93 1.90 0.00 1.88 1.77
## 5-> 2.00 1.96 1.95 1.88 0.00 1.85
## NA-> 1.93 1.90 1.83 1.77 1.85 0.00
cost_matrix_constant <- seqsubm(vet.seq, method = "CONSTANT", with.missing = TRUE)# vet.seq.OM <- seqdist(vet.seq, method = "OM", indel = 3, sm = cost_matrix_constant, with.missing = TRUE)
# clusterward <- agnes(vet.seq.OM, diss = TRUE, method = "ward")
# saveRDS(clusterward, file = "data/clusterward_onet55_10yrs_constant_matrix.rds")
clusterward <- readRDS(file = "~/git/dspg20career/data/clusterward_onet55_10yrs_constant_matrix.rds")
#dendrogram
plot(clusterward, which.plots =2)cluster3 <- cutree(clusterward, k=3)
cluster3 <- factor(cluster3, labels = c("Type 1", "Type 2", "Type 3"))
table(cluster3)## cluster3
## Type 1 Type 2 Type 3
## 1764 605 363
#longitudinal plot
seqfplot(vet.seq, group = cluster3, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster3)cluster4 <- cutree(clusterward, k=4)
cluster4 <- factor(cluster4, labels = c("Type 1", "Type 2", "Type 3", "Type 4"))
table(cluster4)## cluster4
## Type 1 Type 2 Type 3 Type 4
## 1301 463 605 363
#longitudinal plot
seqfplot(vet.seq, group = cluster4, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster4)cluster5 <- cutree(clusterward, k=5)
cluster5 <- factor(cluster5, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type5"))
table(cluster5)## cluster5
## Type 1 Type 2 Type 3 Type 4 Type5
## 1301 463 333 363 272
#longitudinal plot
seqfplot(vet.seq, group = cluster5, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster5)cluster6 <- cutree(clusterward, k=6)
cluster6 <- factor(cluster6, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6"))
table(cluster6)## cluster6
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6
## 1301 463 150 363 272 183
#longitudinal plot
seqfplot(vet.seq, group = cluster6, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster6)cluster7 <- cutree(clusterward, k=7)
cluster7 <- factor(cluster7, labels = c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6", "Type 7"))
table(cluster7)## cluster7
## Type 1 Type 2 Type 3 Type 4 Type 5 Type 6 Type 7
## 1301 463 150 363 235 183 37
#longitudinal plot
seqfplot(vet.seq, group = cluster7, pbarw = T)#another cluster plot
seqmtplot(vet.seq, group = cluster7)